{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "from tqdm import tqdm\n",
    "from matplotlib import pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df=pd.read_csv(r'C:\\Users\\Yasaman\\Downloads\\WRP_national.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2010, 1990, 1985, 1970], dtype=int64)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(['name'])['year'].max().reset_index()['year'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df=df[df['year']==2010]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['chrstgenpct',\n",
       " 'judgenpct',\n",
       " 'islmgenpct',\n",
       " 'budgenpct',\n",
       " 'zorogenpct',\n",
       " 'hindgenpct',\n",
       " 'sikhgenpct',\n",
       " 'shntgenpct',\n",
       " 'bahgenpct',\n",
       " 'taogenpct',\n",
       " 'jaingenpct',\n",
       " 'confgenpct',\n",
       " 'syncgenpct',\n",
       " 'anmgenpct',\n",
       " 'othrgenpct',\n",
       " 'nonreligpct']"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "percentage_columns=[c for c in df.columns if ('genpct' in c) ]\n",
    "percentage_columns+=['nonreligpct']\n",
    "percentage_columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['most']=df[percentage_columns].idxmax(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "reg_map={'chrstgenpct':'Christianity',\n",
    " 'judgenpct':'Judaism',\n",
    " 'islmgenpct':'Islam',\n",
    " 'budgenpct':'Buddhism',\n",
    " 'zorogenpct':'Zoroastrian',\n",
    " 'hindgenpct':'Hindu',\n",
    " 'sikhgenpct':'Sikh',\n",
    " 'shntgenpct':'Shinto',\n",
    " 'bahgenpct':'Baha’i',\n",
    " 'taogenpct':'Taoism',\n",
    " 'jaingenpct':'Jain',\n",
    " 'confgenpct':'Confucianism',\n",
    " 'syncgenpct':'Syncretic Religions',\n",
    " 'anmgenpct':'Animist Religions',\n",
    " 'othrgenpct':'Other Religions',\n",
    " 'nonreligpct':'Non. Religious'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "cow2iso=pd.read_csv(r\"C:\\Users\\Yasaman\\Arab_spring_scholarly_attention\\Data\\Religion\\cow2iso.csv\")\n",
    "cow2iso_map={cow2iso.iloc[i]['cow3']:cow2iso.iloc[i]['iso3'] for i in range(len(cow2iso)) if cow2iso.iloc[i]['cow3']}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['name_iso']=df['name'].map(cow2iso_map)\n",
    "df['most_religion']=df['most'].map(reg_map)\n",
    "df=df[~df['name_iso'].isna()].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>state</th>\n",
       "      <th>name</th>\n",
       "      <th>chrstprot</th>\n",
       "      <th>chrstcat</th>\n",
       "      <th>chrstorth</th>\n",
       "      <th>chrstang</th>\n",
       "      <th>chrstothr</th>\n",
       "      <th>chrstgen</th>\n",
       "      <th>judorth</th>\n",
       "      <th>...</th>\n",
       "      <th>dualrelig</th>\n",
       "      <th>datatype</th>\n",
       "      <th>sourcereliab</th>\n",
       "      <th>recreliab</th>\n",
       "      <th>reliabilevel</th>\n",
       "      <th>Version</th>\n",
       "      <th>sourcecode</th>\n",
       "      <th>most</th>\n",
       "      <th>name_iso</th>\n",
       "      <th>most_religion</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2010</td>\n",
       "      <td>2</td>\n",
       "      <td>USA</td>\n",
       "      <td>119755961</td>\n",
       "      <td>78397889</td>\n",
       "      <td>7036875</td>\n",
       "      <td>4847625</td>\n",
       "      <td>23072957</td>\n",
       "      <td>233111307</td>\n",
       "      <td>1051778</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>High</td>\n",
       "      <td>1.1</td>\n",
       "      <td>25</td>\n",
       "      <td>chrstgenpct</td>\n",
       "      <td>USA</td>\n",
       "      <td>Christianity</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010</td>\n",
       "      <td>20</td>\n",
       "      <td>CAN</td>\n",
       "      <td>7929127</td>\n",
       "      <td>14497030</td>\n",
       "      <td>785000</td>\n",
       "      <td>2722447</td>\n",
       "      <td>497500</td>\n",
       "      <td>26431104</td>\n",
       "      <td>68061</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>34</td>\n",
       "      <td>7</td>\n",
       "      <td>28</td>\n",
       "      <td>Low</td>\n",
       "      <td>1.1</td>\n",
       "      <td>26</td>\n",
       "      <td>chrstgenpct</td>\n",
       "      <td>CAN</td>\n",
       "      <td>Christianity</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010</td>\n",
       "      <td>31</td>\n",
       "      <td>BHM</td>\n",
       "      <td>211798</td>\n",
       "      <td>43863</td>\n",
       "      <td>0</td>\n",
       "      <td>46996</td>\n",
       "      <td>0</td>\n",
       "      <td>302657</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>High</td>\n",
       "      <td>1.1</td>\n",
       "      <td>83</td>\n",
       "      <td>chrstgenpct</td>\n",
       "      <td>BHS</td>\n",
       "      <td>Christianity</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2010</td>\n",
       "      <td>40</td>\n",
       "      <td>CUB</td>\n",
       "      <td>550000</td>\n",
       "      <td>6744696</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>112411</td>\n",
       "      <td>7407107</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>24</td>\n",
       "      <td>10</td>\n",
       "      <td>20</td>\n",
       "      <td>Medium</td>\n",
       "      <td>1.1</td>\n",
       "      <td>78</td>\n",
       "      <td>chrstgenpct</td>\n",
       "      <td>CUB</td>\n",
       "      <td>Christianity</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2010</td>\n",
       "      <td>41</td>\n",
       "      <td>HAI</td>\n",
       "      <td>976083</td>\n",
       "      <td>7027799</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8003882</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>24</td>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "      <td>Medium</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1</td>\n",
       "      <td>chrstgenpct</td>\n",
       "      <td>HTI</td>\n",
       "      <td>Christianity</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>184</th>\n",
       "      <td>2010</td>\n",
       "      <td>970</td>\n",
       "      <td>NAU</td>\n",
       "      <td>6459</td>\n",
       "      <td>3179</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9638</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "      <td>Medium</td>\n",
       "      <td>1.1</td>\n",
       "      <td>83</td>\n",
       "      <td>chrstgenpct</td>\n",
       "      <td>NRU</td>\n",
       "      <td>Christianity</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>185</th>\n",
       "      <td>2010</td>\n",
       "      <td>983</td>\n",
       "      <td>MSI</td>\n",
       "      <td>42561</td>\n",
       "      <td>4910</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3500</td>\n",
       "      <td>50971</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "      <td>Medium</td>\n",
       "      <td>1.1</td>\n",
       "      <td>83</td>\n",
       "      <td>chrstgenpct</td>\n",
       "      <td>MHL</td>\n",
       "      <td>Christianity</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186</th>\n",
       "      <td>2010</td>\n",
       "      <td>986</td>\n",
       "      <td>PAL</td>\n",
       "      <td>3700</td>\n",
       "      <td>12891</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1800</td>\n",
       "      <td>18391</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "      <td>Medium</td>\n",
       "      <td>1.1</td>\n",
       "      <td>83</td>\n",
       "      <td>chrstgenpct</td>\n",
       "      <td>PLW</td>\n",
       "      <td>Christianity</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>187</th>\n",
       "      <td>2010</td>\n",
       "      <td>987</td>\n",
       "      <td>FSM</td>\n",
       "      <td>33500</td>\n",
       "      <td>57500</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5000</td>\n",
       "      <td>96000</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "      <td>Medium</td>\n",
       "      <td>1.1</td>\n",
       "      <td>83</td>\n",
       "      <td>chrstgenpct</td>\n",
       "      <td>FSM</td>\n",
       "      <td>Christianity</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>188</th>\n",
       "      <td>2010</td>\n",
       "      <td>990</td>\n",
       "      <td>WSM</td>\n",
       "      <td>139757</td>\n",
       "      <td>35883</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4500</td>\n",
       "      <td>180140</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "      <td>Medium</td>\n",
       "      <td>1.1</td>\n",
       "      <td>83</td>\n",
       "      <td>chrstgenpct</td>\n",
       "      <td>WSM</td>\n",
       "      <td>Christianity</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>189 rows × 87 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     year  state name  chrstprot  chrstcat  chrstorth  chrstang  chrstothr  \\\n",
       "0    2010      2  USA  119755961  78397889    7036875   4847625   23072957   \n",
       "1    2010     20  CAN    7929127  14497030     785000   2722447     497500   \n",
       "2    2010     31  BHM     211798     43863          0     46996          0   \n",
       "3    2010     40  CUB     550000   6744696          0         0     112411   \n",
       "4    2010     41  HAI     976083   7027799          0         0          0   \n",
       "..    ...    ...  ...        ...       ...        ...       ...        ...   \n",
       "184  2010    970  NAU       6459      3179          0         0          0   \n",
       "185  2010    983  MSI      42561      4910          0         0       3500   \n",
       "186  2010    986  PAL       3700     12891          0         0       1800   \n",
       "187  2010    987  FSM      33500     57500          0         0       5000   \n",
       "188  2010    990  WSM     139757     35883          0         0       4500   \n",
       "\n",
       "      chrstgen  judorth  ...  dualrelig  datatype  sourcereliab  recreliab  \\\n",
       "0    233111307  1051778  ...          0        24             4          7   \n",
       "1     26431104    68061  ...          0        34             7         28   \n",
       "2       302657        0  ...          0        24             4          8   \n",
       "3      7407107        0  ...          1        24            10         20   \n",
       "4      8003882        0  ...          1        24             5         10   \n",
       "..         ...      ...  ...        ...       ...           ...        ...   \n",
       "184       9638        0  ...          0        24             5         10   \n",
       "185      50971        0  ...          0        24             5         10   \n",
       "186      18391        0  ...          0        24             5         10   \n",
       "187      96000        0  ...          0        24             5         10   \n",
       "188     180140        0  ...          0        24             5         10   \n",
       "\n",
       "     reliabilevel  Version  sourcecode         most  name_iso  most_religion  \n",
       "0            High      1.1          25  chrstgenpct       USA   Christianity  \n",
       "1             Low      1.1          26  chrstgenpct       CAN   Christianity  \n",
       "2            High      1.1          83  chrstgenpct       BHS   Christianity  \n",
       "3          Medium      1.1          78  chrstgenpct       CUB   Christianity  \n",
       "4          Medium      1.1           1  chrstgenpct       HTI   Christianity  \n",
       "..            ...      ...         ...          ...       ...            ...  \n",
       "184        Medium      1.1          83  chrstgenpct       NRU   Christianity  \n",
       "185        Medium      1.1          83  chrstgenpct       MHL   Christianity  \n",
       "186        Medium      1.1          83  chrstgenpct       PLW   Christianity  \n",
       "187        Medium      1.1          83  chrstgenpct       FSM   Christianity  \n",
       "188        Medium      1.1          83  chrstgenpct       WSM   Christianity  \n",
       "\n",
       "[189 rows x 87 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[['most_religion', 'name_iso']].reset_index(drop=True).to_csv('cleaned_religion.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
